require(stats)
require(graphics)
#library("ggplot2")
require(ggplot2)
#library("grDevices")
require(grDevices)
#library("psych")
require(psych)
library(gridExtra)
library(grid)

# Please make sure to change the path to where the appropriate dataset is located on your computer

sc.val <-  read.csv("PNAS_VAL.csv") 


# Columns

sponsor.require <- 1
p.tourist.visa <- 2
p.fakedocs <-  3
k <- 4
my.seed <- 5

legaltourist.attempts <- c(6:25)
ability.legal.cols <- c(26:86)
ability.fakedocs.cols <- c(87:147)
ability.work.cols <- c(148:208)

tot.mig <- c(209:269)
legal.mig <- c(270:330)
fd.mig <- c(331:391)
ut.mig <- c(392:452)
stud.mig <- c(453:513)
hs.mig <- c(514:574)
ls.mig <- c(575:635)
fam.mig <- c(636:696)

legal.start <- 232
ut.start <- 9
fd.start <-  9
tot.start <-  250

####

table(sc.val[,k])

### Attempted migration distribution

mean.legtour.attempts.k1 <- as.vector(colMeans(sc.val[which(sc.val[,k] == 1),legaltourist.attempts]))
mean.legtour.attempts.k2 <- as.vector(colMeans(sc.val[which(sc.val[,k] == 2),legaltourist.attempts]))
mean.legtour.attempts.k3 <- as.vector(colMeans(sc.val[which(sc.val[,k] == 3),legaltourist.attempts]))

# Percentiles

percentile.attempts.k1 <- apply(sc.val[which(sc.val[,k] == 1),legaltourist.attempts], 2, quantile, c(0.0275, 0.975))
percentile.attempts.k2 <- apply(sc.val[which(sc.val[,k] == 2),legaltourist.attempts], 2, quantile, c(0.0275, 0.975))
percentile.attempts.k3 <- apply(sc.val[which(sc.val[,k] == 3),legaltourist.attempts], 2, quantile, c(0.0275, 0.975))


survey.legtour.attempts <- c(878,165,82,22,6,4,1) # Pulled from survey data


categories <- c(1:length(mean.legtour.attempts.k1))
mf.mean.legtour.attempts.k1 <- categories * mean.legtour.attempts.k1
mean.mean.sim.att.k1 <- sum(mf.mean.legtour.attempts.k1) / sum(mean.legtour.attempts.k1)

categories <- c(1:length(mean.legtour.attempts.k2))
mf.mean.legtour.attempts.k2 <- categories * mean.legtour.attempts.k2
mean.mean.sim.att.k2 <- sum(mf.mean.legtour.attempts.k2) / sum(mean.legtour.attempts.k2)

categories <- c(1:length(mean.legtour.attempts.k3))
mf.mean.legtour.attempts.k3 <- categories * mean.legtour.attempts.k3
mean.mean.sim.att.k3 <- sum(mf.mean.legtour.attempts.k3) / sum(mean.legtour.attempts.k3)

mf.survey.legtour.attempts <- c(1:length(survey.legtour.attempts)) * survey.legtour.attempts
mean.survey.att <- sum(mf.survey.legtour.attempts) / sum(survey.legtour.attempts)

# median = half the data. find area within category 

(sum(mean.legtour.attempts.k1) / 2)
(sum(mean.legtour.attempts.k2) / 2)
(sum(mean.legtour.attempts.k3) / 2)

# Median = 0 

(sum(survey.legtour.attempts) / 2) 

# Median = 0

# CURVE FITTING IS DONE WITH THE CURVEFITTING TOOL IN MATLAB

# Normalising
sim.attempts.k1 <- mean.legtour.attempts.k1 / 1166
sim.attempts.k2 <- mean.legtour.attempts.k2 / 1166
sim.attempts.k3 <- mean.legtour.attempts.k3 / 1166

sim.attempts.p.k1 <- percentile.attempts.k1 / 1166
sim.attempts.p.k2 <- percentile.attempts.k2 / 1166
sim.attempts.p.k3 <- percentile.attempts.k3 / 1166

survey.attempts <- survey.legtour.attempts / 1166


# Make the lengths comparable by adding zeros to survey

survey.attempts2 <- c(survey.attempts,rep_len(0,5))


all <- as.table(rbind(survey.attempts2,sim.attempts.k1[1:12],sim.attempts.k2[1:12],sim.attempts.k3[1:12]))
colnames(all)[colnames(all)=="A"] <- "0"
colnames(all)[colnames(all)=="B"] <- "1"
colnames(all)[colnames(all)=="C"] <- "2"
colnames(all)[colnames(all)=="D"] <- "3"
colnames(all)[colnames(all)=="E"] <- "4"
colnames(all)[colnames(all)=="F"] <- "5"
colnames(all)[colnames(all)=="G"] <- "6"
colnames(all)[colnames(all)=="H"] <- "7"
colnames(all)[colnames(all)=="I"] <- "8"
colnames(all)[colnames(all)=="J"] <- "9"
colnames(all)[colnames(all)=="K"] <- "10"
colnames(all)[colnames(all)=="L"] <- "11"
colnames(all)[colnames(all)=="M"] <- "12"
rownames(all)[rownames(all)==""] <- "abm.attempts"


k.colors <- c("gray42",rgb(0,1,0.5, alpha = 0.9),rgb(0,1,0.5, alpha = 0.6), rgb(0,1,0.5, alpha = 0.4))


pdf("FigC5.pdf", width = 10, height = 7)
par(pty="s", mar=c(4,4,4,4), xpd=TRUE)
barplot(all, las=1, beside=T,col=k.colors,ylab="Proportion of Individuals",xlab="", 
        cex.names = 0.85, border = F)
for (i in 1:ncol(all))
{
  segments((2.4 + (5*(i-1))),sim.attempts.p.k1[1,i],(2.4 + (5*(i-1))),sim.attempts.p.k1[2,i], lty=1)
  segments((3.4 + (5*(i-1))),sim.attempts.p.k2[1,i],(3.4 + (5*(i-1))),sim.attempts.p.k2[2,i], lty=1)
  segments((4.4 + (5*(i-1))),sim.attempts.p.k3[1,i],(4.4 + (5*(i-1))),sim.attempts.p.k3[2,i], lty=1)
}
legend(x=35,y=0.6, c("Survey","ABM, k=1", "ABM, k=2", "ABM, k=3"), cex = 1, fill=k.colors,horiz=F, border = F, bty="n")
mtext("Visa Denials",side=1, line=2.5)
dev.off()
